In [5]:
%matplotlib inline
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, sys, glob
In [2]:
ls
In [4]:
def get_seqs_only(filename):
"""
Func to read file and strip out accesions data
returns a list of seqs
"""
seqs = []
with open(filename, 'r') as fh:
for line in fh.readlines():
if not line.startswith(">"):
seqs.append(line.strip())
return seqs
In [11]:
seqs = {}
for fn in sorted(glob.glob("*.fasta")):
name = fn.split('_')
name = name[0] + '_' + name[-3]
seqs.update({name: get_seqs_only(fn)})
In [13]:
seqs['archaea_ii'][]
Out[13]:
In [ ]: